/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package com.yyl.weibospider.gather.operate.impl;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.yyl.weibospider.gather.domain.WeiboBean;
import com.yyl.weibospider.gather.operate.WeiboContextParse;
import com.yyl.weibospider.gather.util.WeiboUtil;

public class WeiboContextParseImpl implements WeiboContextParse {

	public WeiboContextParseImpl() {
	}

	public List<WeiboBean> parse(String text) {

		List<WeiboBean> weibos = new ArrayList<WeiboBean>();

		try {

			// text为微博的html文本，包含多条微博信息，但
			// 是是以json格式存在的，需要用到json工具将其解析出来
			text = WeiboUtil.jsonTransform(text);
			// jsop 解析
			Document doc = Jsoup.parse(text);

			// 选出单条微博节点
			Elements select = doc.select("div.WB_feed_type");

			for (Element ele : select) {

				try {

					WeiboBean weibo = new WeiboBean();
					// 微博id
					String weiboID = ele.attr("mid");
					// System.out.println("weiboID:" + weiboID);
					weibo.setWeiboID(weiboID);

					String tbinfo = ele.attr("tbinfo");
					String[] split = tbinfo.split("&");

					// 作者id
					String uID = split[0].replaceAll(".*=", "");
					// //System.out.println("uID:" + uID);
					weibo.setuID(uID);

					// 微博内容
					String weiboContext = ele.select(
							"div.WB_detail > div.WB_text").text();
					// System.out.println("weiboContext:" + weiboContext);
					weibo.setWeiboContext(weiboContext);

					// 本条微博的发送设备
					String weiboDev = ele
							.select("div.WB_detail > div.WB_func a[action-type=app_source]")
							.text();
					// System.out.println("weiboDev:" + weiboDev);
					weibo.setWeiboDev(weiboDev);

					// 本条微博的发送时间
					String weiboTime = ele.select(
							"div.WB_detail > div.WB_func a.WB_time").attr(
							"title");
					// System.out.println("weiboTime:" + weiboTime);
					weibo.setWeiboTime(weiboTime);

					// 赞 转发 评论
					String likeNum = ele
							.select("div.WB_detail > div.WB_func div.WB_handle a[action-type=fl_like]")
							.text().replaceAll(".*\\(|\\).*", "");
					// System.out.println("likeNum:" +
					// WeiboUtil.stringToInt(likeNum));
					weibo.setLikeNum(WeiboUtil.stringToInt(likeNum));

					String forwardNum = ele
							.select("div.WB_detail > div.WB_func div.WB_handle a[action-type=fl_forward]")
							.text().replaceAll(".*\\(|\\).*", "");
					// System.out.println("forwardNum:"
					// + WeiboUtil.stringToInt(forwardNum));
					weibo.setForwardNum(WeiboUtil.stringToInt(forwardNum));

					String commentNum = ele
							.select("div.WB_detail > div.WB_func div.WB_handle a[action-type=fl_comment]")
							.text().replaceAll(".*\\(|\\).*", "");
					// System.out.println("commentNum:"
					// + WeiboUtil.stringToInt(commentNum));
					weibo.setCommentNum(WeiboUtil.stringToInt(commentNum));

					// 判断本条微博是否存在转发
					boolean isfoward = ele.attr("isforward").trim().equals("1");
					weibo.setFoward(isfoward ? 1 : 0);

					weibos.add(weibo);
					// System.out.println("----------------------------------------");
					if (!isfoward) {
						continue;
					}

					WeiboBean oWeibo = new WeiboBean();
					// 被转发微博的作者id
					String ouID = split[1].replaceAll(".*=", "");
					// /调试时走到这一步会出现数组越界错错误
					// 原因是该微博已经被删除
					// System.out.println("ouID:" + ouID);

					oWeibo.setuID(ouID);

					// 被转发微博的 id
					String oweiboID = ele.attr("omid");
					// System.out.println("oweiboID;" + oweiboID);
					oWeibo.setWeiboID(oweiboID);

					// 被转发微博的内容
					String oweiboContext = ele.select(
							"div.WB_media_expand div.WB_text").text();
					// System.out.println("oweiboContext：" + oweiboContext);
					oWeibo.setWeiboContext(oweiboContext);

					// 被转发微博的发送设备
					String oweiboDev = ele
							.select("div.WB_media_expand div.WB_func a[action-type=app_source]")
							.text();
					// System.out.println("oweiboDev：" + oweiboDev);
					oWeibo.setWeiboDev(oweiboDev);

					// 被转发微博的发送时间
					String oweiboTime = ele.select(
							"div.WB_media_expand   div.WB_func a.WB_time")
							.attr("title");
					// System.out.println("oweiboTime：" + oweiboTime);
					oWeibo.setWeiboTime(oweiboTime);

					// 被转发微博的赞的数目
					String olikeNum = ele
							.select("div.WB_media_expand  div.WB_func div.WB_handle a[action-type=fl_like]")
							.text().replaceAll(".*\\(|\\).*", "");
					// System.out.println("olikeNum:"
					// + WeiboUtil.stringToInt(olikeNum));
					oWeibo.setLikeNum(WeiboUtil.stringToInt(olikeNum));

					// 被转发微博的转发数目
					String oforwardNum = ele
							.select("div.WB_media_expand  div.WB_func div.WB_handle a:matches(^转发)")
							.text().replaceAll(".*\\(|\\).*", "");

					// System.out.println("oforwardNum:" +
					// WeiboUtil.stringToInt(oforwardNum));

					oWeibo.setForwardNum(WeiboUtil.stringToInt(oforwardNum));

					// 被转发微博的评论数目
					String ocommentNum = ele
							.select("div.WB_media_expand  div.WB_func div.WB_handle a:matches(^评论)")
							.text().replaceAll(".*\\(|\\).*", "");

					// System.out.println("ocommentNum:" +
					// WeiboUtil.stringToInt(ocommentNum));
					oWeibo.setCommentNum(WeiboUtil.stringToInt(ocommentNum));

					weibo.setoWeibo(oWeibo);

				} catch (Exception e) {
					continue;
				} finally {
					// System.out.println("=============================");
				}
			}
		} catch (Exception e) {

		}
		// System.out.println("\n\n++++++++++++++++++++++++++++++++++++++++\n\n");
		return weibos;
	}

	public String parseUserPageId(String text) {

		if (text == null || text.isEmpty()) {
			return null;
		}

		// text=text.replaceAll("\\\\","");

		Pattern pat = Pattern.compile("\\['page_id'\\]='[\\d]+'");
		Matcher mat = pat.matcher(text);
		boolean rs = mat.find();

		if (rs) {

			return mat.group().replaceAll("[^\\d]+", "");
		}

		return null;
	}

}
